# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import Rule,CrawlSpider
import re
from ..items import JdItem
import time


class JdProjectSpider(CrawlSpider):
    name = 'jd_project'
    # allowed_domains = ['jd.com/','m.jd.com/']
    start_urls = ['https://list.jd.com/list.html?cat=9987,653,655&page=1&sort=sort_rank_asc&trans=1&JL=6_0_0#J_main/']
    #指定索引规则
    rules=(
        Rule(LinkExtractor(allow=(r'&sort=sort_rank_asc&trans=1&JL=6_0_0',)), callback='parse_getgoodsID', follow=True),
    )


    def parse_getgoodsID(self, response):
        data = response.xpath('//li[@class="gl-item"]/div/@data-sku').extract()
        for goodsID in data:
            #构造APP端的页面路径
            url_mobile = 'https://item.m.jd.com/product/{0}.html'.format(goodsID)
            yield scrapy.Request(url=url_mobile,meta={"goodsID":goodsID},callback=self.parse_getgoodsData_fromAPP,dont_filter=True)
    def parse_getgoodsData_fromAPP(self,response):
        html = response.body_as_unicode()
        goodsID = response.meta["goodsID"]
        pattern_goodsName = r'<div class="fn_text_wrap" id="itemName">(.*?)</div>'
        try:
            goodsName = re.compile(pattern_goodsName).findall(html)[0] #商品名
        except:
            goodsName = ""
        pattern_price = r'<em>(\d+)</em>.(\d*)'
        try:
            a,b = re.compile(pattern_price).findall(html)[0]
            price = a+'.'+b  #获取价格
        except:
            price =""
        pattern_Select = r'{"dim":\d+,".*?"skuId":"\d+"}'
        try:
            goodsSelect = ' '.join(re.compile(pattern_Select).findall(html)) #商品可选配置
        except:
            goodsSelect = ""
        pattern_shopName = r'"deliver":"(.*?)"'
        try:
            shopName = re.compile(pattern_shopName).findall(html)[0]  # 店铺名
        except:
            shopName = ""
        pattern_shopID = '"id":(\d+)'
        try:
            shopID  =re.compile(pattern_shopID).findall(html)[0]  #店铺ID
        except:
            shopID = ""
        try:
            shopLink = 'https://shop.m.jd.com/?shopId='+shopID  #店铺链接
        except:
            shopLink = ""
        # #构造电脑端的页面路径
        url_pc = 'https://item.jd.com/{0}.html'.format(goodsID)
        yield scrapy.Request(url=url_pc,callback=self.parse_getgoodsData_fromPC,meta={"goodsID":goodsID,"goodsName":goodsName,"price":price,"goodsSelect":goodsSelect,"shopName":shopName,"shopID":shopID,"shopLink":shopLink})



    def parse_getgoodsData_fromPC(self,response):
        html = response.body_as_unicode()
        item = JdItem()
        item["crawl_time"] = time.asctime()
        item["goodsID"] = response.meta["goodsID"]
        item["goodsName"] = response.meta["goodsName"]
        pattern_ziying = r'''<em class="u-jd">[^❤]+</em>'''
        if len(re.compile(pattern_ziying).findall(html))>0:
            ziying = '是'
        else:
            ziying = '否'  #是否自营
        item["ziying"] = ziying
        pattern_goodsBrand  = r"<li title='(.*?)'>品牌"
        try:
            goodsBrand = re.compile(pattern_goodsBrand).findall(html)[0]  #获取品牌
        except:
            goodsBrand = ""
        item["goodsBrand"] = goodsBrand
        pattern_goodsBelong = r"<li title='.*?'>商品名称：(.*?)</li>"
        try:
            goodsBelong = re.compile(pattern_goodsBelong).findall(html)[0]  #商品署名
        except:
            goodsBelong = ""
        item["goodsBelong"] = goodsBelong
        pattern_weigth  = r"<li title='.*?'>商品毛重：(.*?)</li>"
        try:
            weigth = re.compile(pattern_weigth).findall(html)[0] #商品重量
        except:
            weigth = ""
        item["weigth"] = weigth
        pattern_makein = r"<li title='.*?'>商品产地：(.*?)</li>"
        try:
            makein = re.compile(pattern_makein).findall(html)[0]  #产地
        except:
            makein = ""
        item["makein"] = makein
        pattern_age = r'<dt>上市年份</dt><dd>(.*?)</dd>'
        pattern_age1 = r'<dt>上市月份</dt><dd>(.*?)</dd>'
        try:
            age = re.compile(pattern_age).findall(html)[0]+re.compile(pattern_age1).findall(html)[0] #上市年份
        except:
            age = ""
        item["age"] = age
        item["price"] = response.meta["price"]
        item["goodsSelect"] = response.meta["goodsSelect"]
        item["shopName"] = response.meta["goodsName"]
        item["shopID"] = response.meta["shopID"]
        item["shopLink"] = response.meta["shopLink"]
        item["goodsLink"] = response.url
        yield item






